R 语言结构方程模型：尝试用 R 画一个 PNAS 文章的 SEM 图

原创 ecologyR ecologyR 2024-03-25

引言

新鲜出炉的 PNAS 文章，里面的结构方程模型代码是公开的（数据也在文章附件），

Positive associations fuel soil biodiversity and ecological networks worldwide

不少人应该注意到了，Dr. Manuel Delgado-Baquerizo 指导的论文中的结构方程模型的图很有特色：在结构方程模型图中，将不同类别的预测因子归入一个方框，路径系数在连线旁备注，以实现简洁的视觉效果，

Grouped the different categories of predictors into the same box in the model for graphical simplicity

又如下面一篇文章的图 2，

Multiple elements of soil biodiversity drive ecosystem functions across biomes

在此，研究了怎么使用 R 实现类似的效果（探索了初步效果，未完全完成），

0. 包

# 加载包
library(tidyverse)
library(tidygraph)
library(ggraph)
library(piecewiseSEM)
library(patchwork)

1. 数据

# 读取附件数据
# data <- read.csv("data/metanew.csv", row.names = 1)
# data[, 5:44] <- scale(data[, 5:44]) # 若仅仅使用 scale 函数标准化后，可能会使 piecewiseSEM 报错
# data <- as.data.frame(data)

# 或者写成
data <- read.csv("data/metanew.csv", row.names = 1) |> 
  mutate_if(
    is.numeric,
    ~ as.numeric(scale(.x)) # 为了 piecewiseSEM 此处 scale 需要搭配 as.numeric 一起使用
  )

2. 结构方程模型

# 对 cycfac motif 的 SEM 拟合
sem.model1 <- psem(
  lm(
    biodiversity ~ fragility + cycfac_total + modularity + # 生物
      Distance_from_equator + Elevation + # 空间
      TSEA + MAP + # 气候
      pH + Texture + Soil_C, # 土壤
    data = data
  ),
  lm(
    fragility ~ cycfac_total + modularity + # 生物
      Distance_from_equator + Elevation + # 空间
      TSEA + MAP + # 气候
      pH + Texture + Soil_C, # 土壤
    data = data
  ),
  lm(
    modularity ~ cycfac_total + # 生物
      Distance_from_equator + Elevation + # 空间
      TSEA + MAP + # 气候
      pH + Texture + Soil_C, # 土壤
    data = data
  ),
  lm(
    cycfac_total ~ Distance_from_equator + Elevation + # 空间
      TSEA + MAP + # 气候
      pH + Texture + Soil_C, # 土壤
    data = data
  ),
  lm(
    pH ~ Distance_from_equator + Elevation + # 空间
      TSEA + MAP, # 气候
    data = data
  ),
  lm(
    Soil_C ~ Distance_from_equator + Elevation + # 空间
      TSEA + MAP + # 气候
      pH + Texture, # 土壤
    data = data
  ),
  lm(
    Texture ~ Distance_from_equator + Elevation + # 空间
      TSEA + MAP, # 气候
    data = data
  ),
  lm(
    TSEA ~ Distance_from_equator + Elevation, # 空间
    data = data
  ),
  lm(
    MAP ~ Distance_from_equator + Elevation + # 空间
      TSEA, # 气候
    data = data
  )
)

3. 查看模型结果

拟合的不错，

# 查看 SEM 结果
# dSep 的 P 值都不显著，则拟合得不错
dSep(sem.model1, .progressBar = FALSE) |> print()

##       Independ.Claim Test.Type  DF Crit.Value   P.Value 
## 1 Texture ~ pH + ...      coef 585  -1.505681 0.1326888

# fisherC 的 P 值大于 0.05，则拟合得不错
fisherC(sem.model1) |> print()

##   Fisher.C df P.Value
## 1    4.039  2   0.133

# 随机抽取 10 行结果看看（整个结果表格非常长）
coefs(sem.model1) |>
  select(1:7) |>
  slice_sample(n = 10) |>
  print()

##        Response             Predictor Estimate Std.Error  DF Crit.Value P.Value
## 1            pH Distance_from_equator   0.0171    0.0406 586     0.4204  0.6744
## 2  cycfac_total Distance_from_equator  -0.0617    0.0389 583    -1.5870  0.1130
## 3          TSEA             Elevation  -0.2323    0.0439 588    -5.2962  0.0000
## 4    modularity                  TSEA  -0.2543    0.0462 582    -5.5024  0.0000
## 5     fragility                  TSEA  -0.1916    0.0396 581    -4.8361  0.0000
## 6  cycfac_total             Elevation  -0.0933    0.0445 583    -2.0961  0.0365
## 7     fragility Distance_from_equator   0.0096    0.0332 581     0.2885  0.7731
## 8     fragility                    pH  -0.0734    0.0339 581    -2.1633  0.0309
## 9    modularity Distance_from_equator   0.3103    0.0375 582     8.2663  0.0000
## 10       Soil_C                  TSEA   0.2732    0.0424 584     6.4479  0.0000

4. 绘图

# 获取 SEM 数据
dat0 <- coefs(sem.model1) |>
  select(Predictor, Response, Estimate, P.Value) |>
  mutate(Estimate = ifelse(Estimate == "-", NA, Estimate)) |>
  filter(Predictor != "status") |>
  setNames(nm = c("from0", "to0", "weight", "p"))

# 查看有哪些变量
unique(c(dat0$from0, dat0$to0))

##  [1] "fragility"             "cycfac_total"          "modularity"           
##  [4] "Distance_from_equator" "Elevation"             "TSEA"                 
##  [7] "MAP"                   "pH"                    "Texture"              
## [10] "Soil_C"                "biodiversity"

# 数据操作
# 这是为了下面的 as_tbl_graph 函数：它依据 from、to 列名确定网络的箭头方向
dat <- dat0 |> 
  mutate(
    from1 = case_when(
      from0 %in% c("Distance_from_equator", "Elevation", "TSEA", "MAP","Soil_C", "pH", "Texture") ~ "ENV",
      TRUE ~ from0
    ),
    to1 = case_when(
      to0 %in% c("Distance_from_equator", "Elevation", "TSEA", "MAP","Soil_C", "pH", "Texture") ~ "ENV",
      TRUE ~ to0
    ),
    from2 = case_when(
      from0 %in% c("TSEA", "MAP") ~ "Climate",
      from0 %in% c("Distance_from_equator", "Elevation") ~ "Space",
      TRUE ~ "Soil"
    ),
    to2 = case_when(
      to0 %in% c("TSEA", "MAP") ~ "Climate",
      to0 %in% c("Distance_from_equator", "Elevation") ~ "Space",
      TRUE ~ "Soil"
    )
  )

5. 绘图

5.1 先设置一个绘图主题，

# 设置一个全局可用的主题

font <- "sans"

theme_set(
  theme_void(
    base_family = font,
    base_size = 12
  ) +
    theme(
      plot.background = element_rect(color = NA, fill = "white")
    )
)

5.2 绘图 1：完整模型

注意到，ENV 到 modularity 和 biodiversity 等指标，有多条路径，

# 图 1
graph1 <- dat |>
  relocate(from = from1, to = to1) |>
  as_tbl_graph(directed = TRUE)

# 绘图 1
figure1 <- graph1 |>
  ggraph(layout = "circle") +
  geom_edge_parallel(
    aes(
      color = ifelse(weight > 0, "+", "-"), # 指定正、负相关性颜色
      width = abs(weight), # 宽度正比于路径系数的绝对值
      alpha = ifelse(p < 0.05, "P < 0.05", "P > 0.05"), # 不显示不显著路径，使得透明度为 0
      start_cap = label_rect(node1.name, padding = margin(10, 10, 10, 10)), # 箭头起始端部离节点的间距
      end_cap = label_rect(node2.name, padding = margin(10, 10, 10, 10)) # 箭头结束端离节点的间距
    ),
    arrow = arrow(angle = 60, length = unit(0.2, "line")), # 添加箭头
    sep = unit(0.4, "line") # 平形线的间距
  ) +
  # geom_edge_loop( # 存在循环（可以不使用 loop 这段代码）
  #   aes(
  #     strength = 0.9,
  #     direction = -135
  #   ),
  #   color = "grey",
  #   start_cap = circle(0),
  #   end_cap = circle(0.6),
  #   arrow = arrow(angle = 60, length = unit(0.4, "line"))
  # ) +
  geom_node_label( # 添加节点（变量）文字标签
    aes(label = name),
    size = 4,
    label.r = unit(0.5, "line"),
    family = font
  ) +
  scale_edge_color_manual(name = NULL, values = c("+" = "lightblue2", "-" = "grey0")) +
  scale_edge_width_continuous(name = NULL, limits = c(0, 1), range = c(0.2, 2.4)) +
  scale_edge_alpha_manual(guide = "none", values = c(1, 0)) +
  facet_grid(. ~ "SEM") +
  coord_equal(
    xlim = c(-1.1, 1.1),
    clip = "off"
  ) +
  # theme_minimal() + # 去掉注释，则方便查看图的坐标
  theme(
    strip.background = element_rect(),
    strip.text = element_text(margin = margin(5, 5, 5, 5)),
    legend.position = "none"
  )
figure1

5.3 绘图 2：子模型

土壤子模型，

# 图 2
graph2 <- dat |>
  filter(from1 == "ENV", to1 == "ENV") |>
  relocate(from = from2, to = to2, weight, p) |>
  as_tbl_graph(directed = TRUE)

# 绘图 2
# 代码注释见上一段
figure2 <- graph2 |>
  ggraph(layout = "stress") +
  geom_edge_parallel(
    aes(
      color = ifelse(weight > 0, "+", "-"),
      width = abs(weight),
      alpha = ifelse(p < 0.05, "P < 0.05", "P > 0.05"),
      start_cap = label_rect(node1.name, padding = margin(10, 10, 10, 10)),
      end_cap = label_rect(node2.name, padding = margin(10, 10, 10, 10))
    ),
    label_push = unit(0.5, "line"),
    angle_calc = "along",
    arrow = arrow(angle = 60, length = unit(0.2, "line")),
    sep = unit(0.4, "line")
  ) +
  # geom_edge_loop(
  #   aes(
  #     strength = 0.9,
  #     direction = c(rep(90, 10), rep(-90, 9))
  #   ),
  #   color = "grey",
  #   start_cap = circle(0),
  #   end_cap = circle(0.6),
  #   arrow = arrow(angle = 60, length = unit(0.4, "line"))
  # ) +
  # geom_node_point(size = 8) +
  geom_node_label(
    aes(label = name),
    size = 4,
    label.r = unit(0.5, "line"),
    family = font,
    # vjust = 0
  ) +
  scale_edge_color_manual(name = NULL, values = c("+" = "lightblue2", "-" = "grey0")) +
  scale_edge_width_continuous(name = NULL, limits = c(0, 1), range = c(0.2, 2.4)) +
  scale_edge_alpha_manual(guide = "none", values = c(1, 0)) +
  facet_grid(. ~ "Soil sub-model") +
  coord_equal(
    clip = "off"
  ) +
  # theme_minimal() +
  theme(
    strip.background = element_rect(),
    strip.text = element_text(margin = margin(5, 5, 5, 5)),
    legend.position = "none"
  )
figure2

5.4 其他代码

标记路径系数有点麻烦了，连线太多！可以考虑合并路径，然后在旁边（手动）列出系数——正如论文中那样，

但此时，需要面对一个问题，即：路径宽度（width）怎么确定？是所有子路径系数的和，还是子路径系数绝对值的和？

打赏并私信发来您的邮箱，可获取剩余完整代码（通过邮件发送）

figure3

结语

还可以继续修改。如果想不离开 R 语言直接调整布局，可以参考之前的另一篇公众号文章（独家 R 语言小工具：结构方程模型从拟合到作图，完全使用 R 解决！但遗憾的是，此工具没有针对这种节点之间多条连接的情景做优化），

# 保存为 .csv 表格，即可以用 Shiny APP 小工具调整布局
dat |>
  relocate(from = from1, to = to1) |>
  write_csv("graph1-sem-table.csv")

继续滑动看下一个

ecologyR

向上滑动看下一个

高三女生醉酒后被强奸致死？检方回应

常德悲剧：让谴责无差别杀戮之声更加响亮一点

2024【公共营养师】培训报名通道已开启，不限学历，23岁及以上可报！还能领2000补贴

【惊】"以为要写遗书"! 飞温哥华航班遇炸弹气旋华人崩溃大哭连空姐都吐了; 客机颠簸盘旋3小时

女人最偏爱的十种男人

R 语言结构方程模型：尝试用 R 画一个 PNAS 文章的 SEM 图

引言

0. 包

1. 数据

2. 结构方程模型

3. 查看模型结果

4. 绘图

5. 绘图

5.1 先设置一个绘图主题，

5.2 绘图 1：完整模型

5.3 绘图 2：子模型

5.4 其他代码

结语

您可能也对以下帖子感兴趣

高三女生醉酒后被强奸致死？检方回应

常德悲剧：让谴责无差别杀戮之声更加响亮一点

2024【公共营养师】培训报名通道已开启，不限学历，23岁及以上可报！还能领2000补贴

【惊】"以为要写遗书"! 飞温哥华航班遇炸弹气旋 华人崩溃大哭 连空姐都吐了; 客机颠簸盘旋3小时

女人最偏爱的十种男人

生成图片，分享到微信朋友圈

R 语言结构方程模型：尝试用 R 画一个 PNAS 文章的 SEM 图

引言

0. 包

1. 数据

2. 结构方程模型

3. 查看模型结果

4. 绘图

5. 绘图

5.1 先设置一个绘图主题，

5.2 绘图 1：完整模型

5.3 绘图 2：子模型

5.4 其他代码

结语

您可能也对以下帖子感兴趣

【惊】"以为要写遗书"! 飞温哥华航班遇炸弹气旋华人崩溃大哭连空姐都吐了; 客机颠簸盘旋3小时